## "Where are the missing dead" replication
##
## Reproduce: The boarder line cases Analysis
#
##			  - Figure 3 (a) & (b) outcome plot
##			  - Figure 3 (c) DD plot
##
##			  - Appendix Table A6
## 			  
## 
## Input:     main.dta


## Guoer Liu
## 7/28/2022 


setwd("~/Dropbox/Missing dead/replication/")


library(haven)
library(estimatr)
library(scales)
library(modelsummary) ## modelsummary(): for lm_robust object
library(extrafont) ## change fonts in plot


### Function 

dataCollapse <- function(vector, year.v, treat.ind){
	# Args:
	#	vector: 	n x 1 vector to be collapsed by province-treat
	#	year.v: 	n x 1 vector
	#   treat.ind:  n x 1 vector of treatment indicator
	out <- tapply(vector, 
				  list(year.v, treat.ind), 
				  function(x) mean(x, na.rm = T))
	return(out)
}


paraPlot <- function(collapMat, 
					matrix.x.lab = NULL, matrix.title = NULL,
					l.mar = 3.5,
					hline = NULL,
					x.axis.vec = NULL,
					y.lab = TRUE, y.lab.names = NULL,
					y.min = NULL, y.max = NULL,
					legend = TRUE, leg.pos = NULL){
	# Args:
	#	collapMat:      n x 2 matrix (column 1: control; column 2: treat)
	#   matrix.x.lab:   a string of figure x axis label
	#	matrix.title:	a string of figure title
	#	hline:			a scalar indicate the position of vertical line
	#	x.axis.vec:		a length n string vector that shows x axis
	#   y.lab:			logical parameter to turn off y label
	#   y.lab.names:    a string of y axis lable
	#   y.min:			a scalar indicates the minimum value on y axis
	#   y.max:			a scalar indicates the maximum value on y axis
	#   legend:			logical parameter to turn off legend
	#	leg.pos:		a string indicates legend position
	pch.cex <- 1.6
	ctl.v <- collapMat[, 1]
	tr.v <- collapMat[, 2]
	#y.max <- max(x)
	#y.min <- min(x)
	x.len <- nrow(collapMat)

	par(mar= c(4, l.mar, 1, 1))
	plot(NA, xlim = c(0.5, x.len), 
	         ylim = c(y.min, y.max), 
	         type = "n", 
	         xlab = matrix.x.lab,
	         main = matrix.title, 
	         ylab = "", axes = F, cex.lab = 1.3)
	grid(nx = NULL, ny = NULL,
     	 lty = 2,      # Grid line type
     	 col = "gray", # Grid line color
     	 lwd = 0.6)
	abline(v = hline, col = alpha("orangered", 0.8), lwd = 1.5)
	lines(ctl.v, lty = 2, lwd = 1.5)
	points(ctl.v, cex = pch.cex, pch = 1, col = "black")
	lines(tr.v, lty = 1, lwd = 1.5)
	points(tr.v, cex = pch.cex, pch = 19, col = "black", bg = "black")
	axis(1, at = seq(1, x.len, 1), label = x.axis.vec, 
		 cex.axis = 1, las = 1)
	if (y.lab == TRUE){
		axis(2, at = seq(ceiling(y.min), floor(y.max), 1), 
			label = seq(ceiling(y.min), floor(y.max), 1),
			cex.axis = 1, las = 1)
		title(ylab = y.lab.names, line = 2, cex.lab = 1)
	}
	if (legend == TRUE){
		legend(leg.pos, title = NULL, 
			   legend = c("Treated", "Control"), 
			   col = "black", 
			   lty = c(1, 2), cex = 0.8,
			   pch = c(19, 1),
			   lwd = c(1, 1))
	}
}




mydata <- read_dta("main.dta", encoding = "UTF-8")
mydata <- as.data.frame(mydata)


outcome <- c("death2cont_py_lg", "death3cont_py_lg")

cov.names <- c("log_pop2", "rural_pop_pct",
			   "log_light", "log_total_tax_rev_pc",
			   "log_coal_produ0", "log_mine_indwage",
			   "log_traf_indwage",
			   "sec_tty", "sec_age", 
			   "sec_promotion", "gov_tty", 
			   "gov_age", "gov_promotion")

t.var <- c("did", "treat_time", "treat_group")

rhs.1 <- paste('~', paste(t.var, collapse = " + "))
rhs.2 <- paste('~', paste(c(t.var, cov.names), collapse = " + "))
rhs.3 <- paste('~', paste(c(t.var, cov.names), collapse = " + "), 
						  "+ factor(year)")
rhs.4 <- paste('~', paste(c(t.var, cov.names), collapse = " + "), 
						  "+ factor(year) + factor(prov_id)")
rhs.list <- list(rhs.1, rhs.2, rhs.3, rhs.4)


estimate <- NULL
SE <- NULL
mod.out <- list()

for(i in outcome){
	for(j in 1:length(rhs.list)){
		formula <- as.formula(paste(i, rhs.list[[j]]))
		mod <- lm_robust(formula, cluster = prov_id, data = mydata)
		estimate <- c(estimate, coefficients(mod)['did'])
		SE <- c(SE, summary(mod)$coefficients['did', 2])
		mod.out <- append(mod.out, list(mod))
		## rank deficient due to fixed effects drop 1
	}
}




## Table:

names(mod.out) <- paste0("(", seq(1, 8, 1), ") ", 
						rep(c("Acc. count with 2 Deaths", 
							  "Acc. count with 3 Deaths"), each = 4))


print(modelsummary(mod.out, 
	  coef_map = c("did", "treat_time", "treat_group"),
	  stars = FALSE,
	  output = "table.tex"))


## Plots: Fig 3 (a) & (b) outcome plot

death2cont_pylg_tr <- dataCollapse(mydata$death2cont_py_lg, 
								mydata$year, mydata$treat_group)
death3cont_pylg_tr <- dataCollapse(mydata$death3cont_py_lg, 
								mydata$year, mydata$treat_group)

#pdf("f3_death2_3_outcome.pdf", 
#	family = "Times New Roman", width = 8, height = 3.8)
par(mfrow = c(1, 2), mgp=c(2.5, 0.5, 0))
paraPlot(death2cont_pylg_tr, "Acc. with Exactly Two Deaths", NULL,
		hline = 4, x.axis.vec = seq(2000, 2005, 1), 
		y.lab = TRUE, y.lab.names = "Log (Acc. count)",
		y.min = 0.0, y.max = 4,
		legend = FALSE, leg.pos = "bottomright")
paraPlot(death3cont_pylg_tr, "Acc. with Exactly Three Deaths", NULL, 
		l.mar = 1.5,
		hline = 4, x.axis.vec = seq(2000, 2005, 1), 
		y.lab = FALSE,
		y.min = 0.0, y.max = 4,
		legend = TRUE, leg.pos = "bottomright")
#dev.off()


## Plot: Fig 3 (c) DID <main>

estimate.main <- estimate[c(4, 8)] # main specification with all controls & FEs
SE.main <- SE[c(4, 8)]

#pdf("f3_death2_3_did_main.pdf", family = "Times New Roman", 
#	width = 4.2, height = 3.8)
	y.ind <- c(1, 2)
	y.ind <- rev(y.ind)

	par(mar= c(3,2,0,1), mgp=c(2, 0.5, 0))
	plot(NA, xlim = c(-2, 2), 
	         ylim = c(0.5, 2.5), 
	         type = "n", 
	         xlab = "DD Coefficient", 
	         ylab = "", axes = F, cex.lab = 1.1)
	segments(x0 = estimate.main - qnorm(0.95) * SE.main, 
	         y0 = y.ind, 
	         x1 = estimate.main + qnorm(0.95) * SE.main, 
	         y1 = y.ind, 
	         col = alpha("black", 0.45), lwd = 6)
	segments(x0 = estimate.main - qnorm(0.975) * SE.main, 
	         y0 = y.ind, 
	         x1 = estimate.main + qnorm(0.975) * SE.main, 
	         y1 = y.ind, 
	         col = "black", lwd = 2)
	points(estimate.main, y.ind, cex = 2, pch = 19, 
	       col = "black", bg = "black")
	axis(1, at = seq(-2, 2, 1), cex.axis = 1, las = 1)
	text(0, 2.4, labels = "Accidents with Exactly Two Deaths", 
		las = 2, cex = 1.3, font = 2)
	text(0, 1.4, labels = "Accidents with Exactly Three Deaths", 
		las = 2, cex = 1.3, font = 2)
	abline(v = 0, col = "gray", lty = 2, lwd = 0.8)
	legend("bottomright", title = "CI", 
		   legend = c("95%", "90%"), 
		   col = c("black", alpha("black", 0.45)), 
		   lty = c(1, 1), cex = 0.8,
		   lwd = c(1, 4))
#dev.off()


## Plot: Fig 3 (c) DID <all>

#pdf("f3_death2_3_did.pdf", family = "Times New Roman", 
#	width = 4.2, height = 3.8)
	y.ind <- c(1.5, 1.9, 2.3, 2.7, 4, 4.4, 4.8, 5.2)
	y.ind <- rev(y.ind)

	par(mar= c(3,2,0,1), mgp=c(2, 0.5, 0))
	plot(NA, xlim = c(-2, 2), 
	         ylim = c(1.1, 5.7), 
	         type = "n", 
	         xlab = "DD Coefficient", 
	         ylab = "", axes = F, cex.lab = 1.1)
	segments(x0 = estimate - qnorm(0.95) * SE, 
	         y0 = y.ind, 
	         x1 = estimate + qnorm(0.95) * SE, 
	         y1 = y.ind, 
	         col = alpha("black", 0.45), lwd = 6)
	segments(x0 = estimate - qnorm(0.975) * SE, 
	         y0 = y.ind, 
	         x1 = estimate + qnorm(0.975) * SE, 
	         y1 = y.ind, 
	         col = "black", lwd = 2)
	points(estimate, y.ind, cex = 2, pch = 19, 
	       col = "black", bg = "black")
	axis(1, at = seq(-2, 2, 1), cex.axis = 1, las = 1)
	axis(2, at = y.ind, 
			labels = rep(paste0("(", seq(1, 4, 1), ")"), 2), 
			las = 2, cex.axis = 1, tick = F)
	text(0, 5.6, labels = "Accidents with Exactly Two Deaths", 
		las = 2, cex = 1.3, font = 2)
	text(0, 3.1, labels = "Accidents with Exactly Three Deaths", 
		las = 2, cex = 1.3, font = 2)
	abline(v = 0, col = "gray", lty = 2, lwd = 0.8)
	legend("bottomright", title = "CI", 
		   legend = c("95%", "90%"), 
		   col = c("black", alpha("black", 0.45)), 
		   lty = c(1, 1), cex = 0.8,
		   lwd = c(1, 4))
#dev.off()



## check missingness
summary(mydata$death2cont_py_lg)
sd(mydata$death2cont_py_lg, na.rm = T)

summary(mydata$death3cont_py_lg)
sd(mydata$death3cont_py_lg, na.rm = T)

summary(mydata$totcount_py)
sd(mydata$totcount_py, na.rm = T)








